*************************************************
* Title: rwanda_jde_table1.do
* Author: Todd Pugatch
* Last update: June 10 2024
*
* Description: analysis for Blimpo and Pugatch, "Entrepreneurship Education
*	and Teacher Training in Rwanda," Stage 2 Registered report, Journal of 
*	Development Economics
*
* Inputs: 	teacher_merge_jde.dta
*			student_merge_jde.dta
*
* Outputs: rwanda_jde_table1.[txt/xls]
* Notes: produces Table 1
**************************************************

* Set environment
local start=`"$S_TIME"'
clear
clear matrix
clear mata
graph drop _all
program drop _all
cap log close
set more off


* Set directories 
*global main "[SET MAIN DIRECTORY HERE]"
	global rawdata "$main/01_data/01_raw"
	global cleandata "$main/01_data/02_clean"
	global dofiles "$main/02_dofiles"
	global results "$main/03_results"
	global temp "$main/04_output"


* open log file
log using "$temp/rwanda_jde_table1.txt", text replace

* TABLE P1, PANEL A: sample sizes
* Schools (Row 1) 
/*Define presence of school using student data: school is in sample if at least one student from that school completed
	survey. Use data without students added at endline, as described in JDE Registered Report Stage 1.*/

qui use "$cleandata/student_merge_jde.dta", clear

/*collapse to one observation per school*/
collapse (mean) treatment schoolname_bl (max) insample_bl insample_el, by(school_code) 

/*treatment status if in baseline*/
tab insample_bl treatment, mi

/*treatment status if in endline*/
tab insample_el treatment, mi

* Teachers (rows 2-4)
qui use "$cleandata/teacher_merge_jde.dta", clear

/*Row 2: total number of teachers*/
	/*treatment status if in baseline*/
tab treatment if insample_bl==1, mi

	/*treatment status if in endline*/
tab treatment if insample_el==1, mi

/*Row 3: teachers surveyed at baseline only*/
tab treatment if insample_el==1 & insample_bl==1, mi

/*Row 4: teachers added at endline*/
tab insample_el treatment if insample_el==1 & insample_bl==0, mi

* Students (Row 5)
qui use "$cleandata/student_merge_jde.dta", clear

/*treatment status if in baseline*/
tab insample_bl treatment, mi

/*treatment status if in endline*/
tab insample_el treatment, mi

* TABLE P1, PANEL B: attrition
/*Teachers (row 1)*/
qui use "$cleandata/teacher_merge_jde.dta", clear

/*define teacher attrition: 
	attrition=1 if teacher surveyed at baseline but not at endline
	attrition=0 if teachers surveyed at endline but not baseline
	--exclude those observed but not surveyed at endline*/
qui gen teacher_attrition=(insample_bl==1 & insample_el==0)
qui replace teacher_attrition=. if insample_bl==0|(insample_obs_el==1 & insample_el==0)

/*mean teacher attrition by treatment status:
	--first get unadjusted means and difference
	--then calculate p-value, adjusting for randomization strata*/

orth_out teacher_attrition using "$results/rwanda_jde_table1.xls", ///
	by(treatment) se compare count colnum title("teacher attrition (Table P1), unadjusted") ///
	replace
xi: orth_out teacher_attrition using "$results/rwanda_jde_table1.xls", ///
	by(treatment) se compare count colnum test covar(i.strata) title("teacher attrition (Table P1), adjusted for strata") ///
	vappend replace
	
/*Students (row 2)*/
qui use "$cleandata/student_merge_jde.dta", clear	

* define attrition: not in endline survey (sample already restricts to those in baseline)
qui gen student_attrition=1-insample_el

/*mean student attrition by treatment status:
	--first get unadjusted means and difference
	--then calculate p-value, adjusting for randomization strata
	--cluster s.e.'s by school*/

orth_out student_attrition using "$results/rwanda_jde_table1.xls", ///
	by(treatment) se vce(cluster school_code) compare count colnum ///
	title("student attrition (Table P1), unadjusted") vappend replace

xi: orth_out student_attrition using "$results/rwanda_jde_table1.xls", ///
	by(treatment) se vce(cluster school_code) compare count colnum 	   ///
	test covar(i.strata) title("student attrition (Table P1), adjusted for strata") vappend replace

* TRANSFERS
/*Also check on transfers between schools, as discussed in JDE Stage 1, Section 3.5.
		Teachers: can't check this because endline survey didn't record teacher's school in 2016.
		Students: transfer is a student who is:
			--in baseline and endline surveys
			--baseline and endline school codes don't match
			--not a dropout
	Check overall level of transfers, and if different by T/C.*/
	
tab treatment, su(transfer)
areg transfer treatment, a(strata) cluster(school_code)

local end=`"$S_TIME"' 
di "`start'"
di "`end'"
log close
